-- coding: utf-8 --
Author: methylDragon (methylDragon.com)
"Raa."
In [7]:
#NLP
from pattern.web import Twitter
from textblob import TextBlob
import nltk.data
from nltk.tokenize import word_tokenize, sent_tokenize
#NLTK RESOURCE DOWNLOADING
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
#PARSER
from newspaper import Article
import newspaper
#set tokenizer model
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
In [39]:
#list, download, and parse article
a = Article("http://www.theindependent.sg/the-rule-of-law-has-been-mercilessly-mocked-and-denigrated-chee-soon-juan-on-pe2017/", language='en') # Set language as english
a.download()
a.parse()
a.nlp()
In [40]:
#Quality of life bold delimiters
b = "\033[1m"
endb = "\033[0;0m"
#START OF ARTICLE
print(b + "START OF ARTICLE - START OF ARTICLE - START OF ARTICLE - START OF ARTICLE - START OF ARTICLE" + endb)
#Overall sentiment trackers
count = 0
overallScore = [0.0,0.0]
#Print meta-data
print("\n-----\n" + b + "METADATA" + endb + "\n-----")
print(b + "Title: " + endb, end="")
print(a.title)
print(b + "Author(s): " + endb, end="")
print(a.authors)
print(b + "Keywords: " + endb, end="")
print(a.keywords)
print(b + "Date: " + endb, end="")
print(a.publish_date)
print(b + "Top Image: " + endb, end="")
print(a.top_image)
print(b + "Videos: " + endb, end="")
print(a.movies)
#Print summary
print("\n-----\n" + b + "SUMMARY" + endb + "\n-----")
print(a.summary)
#Split article into sentences
print("\n-----\n" + b + "ANALYSIS" + endb + "\n-----")
for index, token in enumerate(tokenizer.tokenize(a.text)):
analysis = TextBlob(tokenizer.tokenize(a.text)[index])
# analysis.correct() #Correct mispelt words !!! IF YOU ACTIVATE THIS IT'LL BE SLOW
print(tokenizer.tokenize(a.text)[index] + b)
#and for each sentence, analyze sentiment
print(analysis.sentiment)
#Prep overall analysis tracker, IGNORE if parameters are [0.0, 0.0] for sentence
if analysis.sentiment.polarity != 0.0 and analysis.sentiment.subjectivity != 0.0:
count += 1
overallScore[0] += analysis.sentiment.polarity
overallScore[1] += analysis.sentiment.subjectivity
print(endb + "-----")
#Guarding against divisions by 0
if count == 0:
count = 1
#Print overall sentiment
print("\n-----\n" + b + "OVERALL SENTIMENT" + endb + "\n-----")
#print(TextBlob(a.text).sentiment)
print(b + "Polarity: " + endb, end="")
print(overallScore[0]/count, end=" | ")
print(b + "Subjectivity: " + endb, end="")
print(overallScore[1]/count, end="")
print(endb + "\n")
print(b + "END OF ARTICLE - END OF ARTICLE - END OF ARTICLE - END OF ARTICLE - END OF ARTICLE" + endb)
In [41]:
teehee = 0.13123
print("{0:.2f}".format(teehee))
In [ ]: